Import data from CSV files.
p_load(readr)
p_load(stringi)
trees <- read_csv("TreePrice.csv")
## Parsed with column specification:
## cols(
## Cost = col_double(),
## Height = col_double()
## )
head(round(trees,0))
## # A tibble: 6 x 2
## Cost Height
## <dbl> <dbl>
## 1 70 6
## 2 78 8
## 3 89 7
## 4 67 8
## 5 49 5
## 6 62 6
A few summary plots and numerical descriptives follow.
summary(trees)
## Cost Height
## Min. : 42.00 Min. : 3.000
## 1st Qu.: 67.85 1st Qu.: 6.295
## Median : 79.56 Median : 7.234
## Mean : 81.23 Mean : 7.643
## 3rd Qu.: 92.36 3rd Qu.: 8.548
## Max. :128.55 Max. :13.764
p_load(lattice)
histogram(~Cost, data=trees)
histogram(~Height, data=trees)
We now fit a few models.
trees.lm = lm(Cost~Height, data=trees)
summary(trees.lm)
##
## Call:
## lm(formula = Cost ~ Height, data = trees)
##
## Residuals:
## Min 1Q Median 3Q Max
## -32.701 -11.528 0.005 11.538 31.806
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 45.484 0.411 110.67 <2e-16 ***
## Height 4.677 0.052 89.95 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 14 on 17905 degrees of freedom
## Multiple R-squared: 0.3112, Adjusted R-squared: 0.3112
## F-statistic: 8091 on 1 and 17905 DF, p-value: < 2.2e-16
xyplot(residuals~fitted.values, data=trees.lm, aspect=1, cex=.25, col="green")